範例

範例

COCO數據集是一個具有大規模目標檢測、影像分割和語意標註的數據集。
COCO數據集有5種標註類型,以json檔儲存,如下。
2.1 Object Instances(物件偵測)
2.2 Keypoint Detection(關鍵點檢測)
2.3 Image Captioning(圖像描述)
2.4 Stuff Segmentation(語意分割)
2.5 Panoptic Segmentation(全景分割)
YOLO模型主要任務為Object Detection,故,我們採用Object Instance創建COCO(json檔)。
Object Instance標註格式
4.1 以JSON排版工具呈現

4.2 info:對於資料集的描述。如:中英數OCR資料集、創立年份、提供者名稱、資料集版本。

4.3 images:對於圖片的描述。如:圖片編號、檔案名稱、圖片尺寸。

4.4 annotations:圖片標籤資訊。如:bbox中的0~3,代表bounding box的x座標、y座標、寬度與高度。

4.5 categories:紀錄物件的標籤類別。

流程與Python函式
1.1 xml轉換為annos.txt
def get(root, name):
    return root.findall(name)
def get_and_check(root, name, length):
    vars = root.findall(name)
    if len(vars) == 0:
        raise NotImplementedError('Can not find {} in {}.'
                                  .format(name, root.tag))
    if length > 0 and len(vars) != length:
        raise NotImplementedError('The size of {} is supposed to be {},
                                  but is {}.'.format(name, length, 
                                  len(vars)))
    if length == 1:
        vars = vars[0]
    return vars
def transfer_xml_to_annos(xmlPath, saveDir):
    n = 1
    for xml in xmlPath:
        tree = ET.parse(xml)
        root = tree.getroot()
        # 圖片名稱
        filename = get_and_check(root, 'filename', 1).text
        # 處理每個標註的檢測框
        with open(saveDir, "a") as bbox:
            for obj in get(root, 'object'):
                category = get_and_check(obj, 'name', 1).text
                label_index = str(classes.index(category) + 1)
                bndbox = get_and_check(obj, 'bndbox', 1)
                xmin = int(get_and_check(bndbox, 'xmin', 1).text) - 1
                ymin = int(get_and_check(bndbox, 'ymin', 1).text) - 1
                xmax = int(get_and_check(bndbox, 'xmax', 1).text)
                ymax = int(get_and_check(bndbox, 'ymax', 1).text)
                bbox.write(filename + ' {} {} {} {} {}\n'
                           .format(label_index, xmin, ymin, xmax, ymax))
        print('※ 第{:3d}個xml檔案完成'.format(n))
        print('※ 剩{:3d}個需轉換'.format(len(xmlPath)-n))
        print("-" * 35)
        n += 1
1.2 將圖片依照比例分配train與val資料集
def train_val_split(source, ratio):
    # 讀取images資料夾內圖片檔名
    indexes = os.listdir(os.path.join(source, 'images'))
    # 檔案順序隨機
    random.shuffle(indexes)
    # 創建訓練或驗證集(依照比例分配)
    pic_num = len(indexes)
    train_num = int(pic_num * ratio)
    train_list = indexes[:train_num]
    val_list = indexes[train_num:]
    return train_list, val_list
1.3 將標籤轉換成coco格式,並以json格式存檔。資料夾包含以下檔案。
def transfer_and_save_coco(source, split_list, dataset, phase):
    # 紀錄處理的圖片數量
    count = 0
    # 讀取bbox信息
    with open(os.path.join(source, 'annos.txt')) as tr:
        annos = tr.readlines()
        # 轉換為coco格式
        for k, index in enumerate(split_list):
            count += 1
            # opencv讀取圖片,得到圖片寬、高
            im = cv2.imread(os.path.join(source, 'images/') + index)
            height, width, _ = im.shape
            # 將圖片檔名、index、寬高信息存入dataset
            dataset['images'].append({'file_name': index,
                                      'id': k,
                                      'width': width,
                                      'height': height})
            for i, anno in enumerate(annos):
                parts = anno.strip().split()
                # 如果圖片檔名與標籤名稱相同,則添加標籤
                if parts[0] == index:
                    # 類別
                    cls_id = parts[1]
                    # x_min
                    x1 = float(parts[2])
                    # y_min
                    y1 = float(parts[3])
                    # x_max
                    x2 = float(parts[4])
                    # y_max
                    y2 = float(parts[5])
                    width = max(0, x2 - x1)
                    height = max(0, y2 - y1)
                    dataset['annotations'].append({
                        'area': width * height,
                        'bbox': [x1, y1, width, height],
                        'category_id': int(cls_id),
                        'id': i,
                        'image_id': k,
                        'iscrowd': 0,
                        # 影像分割時使用,矩形是從左上角順時針畫4點(mask)
                        # 影像分割時'ignore':0與
                        # 'segmentation':[[x1,y1,x2,y1,x2,y2,x1,y2]]
                        'segmentation': []
                    })
            print('   {} images handled'.format(count))
    # 儲存json檔
    folder = os.path.join(source, 'annotations')
    if not os.path.exists(folder):
        os.makedirs(folder)
    json_name = os.path.join(source, 'annotations/{}.json'.format(phase))
    with open(json_name, 'w') as f:
        json.dump(dataset, f)
# 生成train與val之coco格式json檔
def txt_to_coco_json(source, classes, split_list, phase):
    # dataset存放圖片信息和標籤(instances目標檢測、segementation影像分割)
    dataset = {'info': {'description': '', 'url': '', 'version': '1.0',
                        'year': 2022, 'contributor': 'James', 
                        'date_created': ''}, 
               'categories': [], 
               'annotations': [], 
               'images': [], 
               'type': 'instances'}
    # 建立標籤與id的對應關係
    for i, cls in enumerate(classes, 1):
        dataset['categories'].append({'id': i, 'name': cls, 
                                      'supercategory': 'mark'})
    # train, val資料轉換成coco格式,以json儲存
    print('※ 開始轉換{}'.format(phase))
    transfer_and_save_coco(source, split_list, dataset, phase)
    print('※ {}.json Done'.format(phase))
1.4 移動圖片到train與val資料夾
def split_images_to_train_and_val(source, train_list, val_list):
    # 創建圖片train與val資料夾
    folder1 = os.path.join(source, 'train2017')
    if not os.path.exists(folder1):
        os.makedirs(folder1)
    folder2 = os.path.join(source, 'val2017')
    if not os.path.exists(folder2):
        os.makedirs(folder2)
    # 移動圖片到資料夾
    for move_it in train_list:
        shutil.move(source + '/images/' + move_it, 
                    os.path.join(source, 'train2017', ''))
    for move_it in val_list:
        shutil.move(source + '/images/' + move_it, 
                    os.path.join(source, 'val2017', ''))
    print('移動圖片到train與val資料夾 Done')
完整程式碼
import shutil
import random
import json
import cv2
import os
import xml.etree.ElementTree as ET
# ----------------------------------Step1----------------------------------
def get(root, name):
    return root.findall(name)
def get_and_check(root, name, length):
    vars = root.findall(name)
    if len(vars) == 0:
        raise NotImplementedError('Can not find {} in {}.'
                                  .format(name, root.tag))
    if length > 0 and len(vars) != length:
        raise NotImplementedError('The size of {} is supposed to be {}, 
                                  but is {}.'.format(name, length, len(vars)))
    if length == 1:
        vars = vars[0]
    return vars
def transfer_xml_to_annos(xmlPath, saveDir):
    n = 1
    for xml in xmlPath:
        tree = ET.parse(xml)
        root = tree.getroot()
        # 圖片名稱
        filename = get_and_check(root, 'filename', 1).text
        # 處理每個標註的檢測框
        with open(saveDir, "a") as bbox:
            for obj in get(root, 'object'):
                category = get_and_check(obj, 'name', 1).text
                label_index = str(classes.index(category) + 1)
                bndbox = get_and_check(obj, 'bndbox', 1)
                xmin = int(get_and_check(bndbox, 'xmin', 1).text) - 1
                ymin = int(get_and_check(bndbox, 'ymin', 1).text) - 1
                xmax = int(get_and_check(bndbox, 'xmax', 1).text)
                ymax = int(get_and_check(bndbox, 'ymax', 1).text)
                bbox.write(filename + ' {} {} {} {} {}\n'
                           .format(label_index, xmin, ymin, xmax, ymax))
        print('※ 第{:3d}個xml檔案完成'.format(n))
        print('※ 剩{:3d}個需轉換'.format(len(xmlPath)-n))
        print("-" * 35)
        n += 1
# ----------------------------------Step2----------------------------------
# 將圖片依照比例分配train與val
def train_val_split(source, ratio):
    # 讀取images資料夾內圖片檔名
    indexes = os.listdir(os.path.join(source, 'images'))
    # 檔案順序隨機
    random.shuffle(indexes)
    # 創建訓練或驗證集(待優化,自動比例split)
    pic_num = len(indexes)
    train_num = int(pic_num * ratio)
    train_list = indexes[:train_num]
    val_list = indexes[train_num:]
    return train_list, val_list
# 轉換coco格式dataset
def transfer_and_save_coco(source, split_list, dataset, phase):
    # 紀錄處理的圖片數量
    count = 0
    # 讀取Bbox信息
    with open(os.path.join(source, 'annos.txt')) as tr:
        annos = tr.readlines()
        # 轉換為coco格式
        for k, index in enumerate(split_list):
            count += 1
            # opencv讀取圖片,得到圖片寬、高
            im = cv2.imread(os.path.join(source, 'images/') + index)
            height, width, _ = im.shape
            # 將圖片檔名、index、寬高信息存入dataset
            dataset['images'].append({'file_name': index,
                                      'id': k,
                                      'width': width,
                                      'height': height})
            for i, anno in enumerate(annos):
                parts = anno.strip().split()
                # 如果圖片檔名與標籤名稱相同,則添加標籤
                if parts[0] == index:
                    # 類別
                    cls_id = parts[1]
                    # x_min
                    x1 = float(parts[2])
                    # y_min
                    y1 = float(parts[3])
                    # x_max
                    x2 = float(parts[4])
                    # y_max
                    y2 = float(parts[5])
                    width = max(0, x2 - x1)
                    height = max(0, y2 - y1)
                    dataset['annotations'].append({
                        'area': width * height,
                        'bbox': [x1, y1, width, height],
                        'category_id': int(cls_id),
                        'id': i,
                        'image_id': k,
                        'iscrowd': 0,
                        # 影像分割時使用,矩形是從左上角順時針畫4點(mask)
                        # 影像分割時'ignore': 0 與
                        # 'segmentation': [[x1, y1, x2, y1, x2, y2, x1, y2]]
                        'segmentation': []
                    })
            print('   {} images handled'.format(count))
    # 儲存json檔
    folder = os.path.join(source, 'annotations')
    if not os.path.exists(folder):
        os.makedirs(folder)
    json_name = os.path.join(source, 'annotations/{}.json'.format(phase))
    with open(json_name, 'w') as f:
        json.dump(dataset, f)
# 生成train與val之coco格式json檔
def txt_to_coco_json(source, classes, split_list, phase):
    # dataset存放圖片信息和標籤(instances目標檢測、segementation影像分割)
    dataset = {'info': {'description': '', 'url': '', 'version': '1.0', 
                        'year': 2022, 'contributor': 'James', 
                        'date_created': ''},
               'categories': [], 
               'annotations': [], 
               'images': [],
               'type': 'instances'}
    # 建立標籤與id的對應關係
    for i, cls in enumerate(classes, 1):
        dataset['categories'].append({'id': i, 'name': cls, 
                                      'supercategory': 'mark'})
    # train, val資料轉換成coco格式,以json儲存
    print('※ 開始轉換{}'.format(phase))
    transfer_and_save_coco(source, split_list, dataset, phase)
    print('※ {}.json Done'.format(phase))
# 移動圖片到train與val資料夾
def split_images_to_train_and_val(source, train_list, val_list):
    # 創建圖片train與val資料夾
    folder1 = os.path.join(source, 'train2017')
    if not os.path.exists(folder1):
        os.makedirs(folder1)
    folder2 = os.path.join(source, 'val2017')
    if not os.path.exists(folder2):
        os.makedirs(folder2)
    # 移動圖片到資料夾
    for move_it in train_list:
        shutil.move(source + '/images/' + move_it, 
                    os.path.join(source, 'train2017', ''))
    for move_it in val_list:
        shutil.move(source + '/images/' + move_it, 
                    os.path.join(source, 'val2017', ''))
    print('移動圖片到train與val資料夾 Done')
if __name__ == '__main__':
    source = './dataests3'
    # 讀取標籤類別
    with open(os.path.join(source, 'classes.txt')) as f:
        classes = f.read().strip().split()
    '''【Step1】xml轉換為annos.txt:其中每行為imageName、classId、xMin、
       yMim、xMax、yMax,一個bbox對應一行(coco格式的id編號從1起算)'''
    print('【Step1】xml轉annos.txt')
    # annos.txt存檔路徑
    saveDir = os.path.join(source, 'annos.txt')
    # image資料夾路徑
    imageDir = os.path.join(source, 'images/')
    # image檔案路徑
    imagePath = os.listdir(imageDir)
    imagePath = [imageDir + i for i in imagePath]
    # xml資料夾路徑
    xmlDir = os.path.join(source, 'xmls/')
    # xml檔案路徑
    xmlPath = os.listdir(xmlDir)
    xmlPath = [xmlDir + i for i in xmlPath]
    # 將xml轉換為annos
    transfer_xml_to_annos(xmlPath, saveDir)
    print('=' * 60)
    '''【Step2】將標籤轉換成coco格式,並以json格式存檔。資料夾包含
        images(圖片資料夾)、annos.txt(bbox標記)、
        classes.txt(類別清單)及annotations(儲存json的資料夾)。'''
    print('【Step2】annos.txt轉coco,並以json格式儲存')
    # 將圖片依照比例分配train與val
    train_list, val_list = train_val_split(source, 0.9)
    # # 生成train與val之coco格式json檔
    txt_to_coco_json(source, classes, train_list, 'instances_train2017')
    print('-' * 35)
    txt_to_coco_json(source, classes, val_list, 'instances_val2017')
    print('-' * 35)
    # 移動圖片到train與val資料夾
    split_images_to_train_and_val(source, train_list, val_list)
    print('程式執行結束')
執行程式
3.1 執行前
資料夾結構

images

xmls

classes

3.2 執行後
執行結果

資料夾結構

annotations

train2017

val2017

讓我們繼續看下去...